Here, we will cluster the cells of the patient with id “019”.
library(Seurat)
library(SeuratWrappers)
library(harmony)
library(tidyverse)
# Paths
path_to_obj <- here::here("results/R_objects/4.seurat_leukemic.rds")
path_to_save <- here::here("results/R_objects/5.seurat_clustered_19.rds")
# Colors
color_palette <- c("black", "gray", "red", "yellow", "violet", "green4",
"blue", "mediumorchid2", "coral2", "blueviolet",
"indianred4", "deepskyblue1", "dimgray", "deeppink1",
"green", "lightgray", "hotpink1")
# Source functions
source(here::here("bin/utils.R"))
# Thresholds
max_pct_mt <- 18
seurat <- readRDS(path_to_obj)
Of note, everytime that we filter out a specific population, the set of highly variable genes (HVG) and the axis of variability (PCs) changes. Thus, we need to rerun the main analysis steps iteratively.
First, we focus on the patient studied in this notebook:
seurat <- subset(seurat, donor_id == "19")
seurat <- process_seurat(seurat, dims = 1:20)
DimPlot(seurat, group.by = "subproject")
As we can see, there is a large batch effect between the two experiments we conducted BCLLATLAS_10 and BCLLATLAS_29. As we know, in BCLLATLAS_10 we obtain single-cell transcriptomes for serial clinical samples of 4 donors. However, as we observed that some points were underrepresented (had less cells), we performed scRNA-seq of specific cases (BCLLATLAS_29). Thus, we will initially work with BCLLATLAS_10, and use BCLLATLAS_29 later as a validation.
seurat <- subset(seurat, subproject == "BCLLATLAS_10")
seurat <- process_seurat(seurat, dims = 1:20)
DimPlot(seurat, group.by = "subproject")
FeaturePlot(seurat, "pct_mt") +
scale_color_viridis_c(option = "magma")
FeaturePlot(seurat, "nFeature_RNA") +
scale_color_viridis_c(option = "magma")
We see a subpopulation with characteristics of low-quality cells: high mitochondrial expression and low number of detected features (genes). Hence, let us exclude it from the dataset:
seurat <- FindNeighbors(seurat, reduction = "pca", dims = 1:20)
seurat <- FindClusters(seurat, resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 8286
## Number of edges: 274795
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8626
## Number of communities: 5
## Elapsed time: 1 seconds
DimPlot(seurat)
seurat <- subset(seurat, seurat_clusters != "3")
seurat <- subset(seurat, pct_mt < max_pct_mt)
seurat <- process_seurat(seurat, dims = 1:20)
DimPlot(seurat)
DimPlot(seurat, group.by = "time_point")
seurat <- FindNeighbors(seurat, reduction = "pca", dims = 1:20)
seurat <- FindClusters(seurat, resolution = 0.6)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 7435
## Number of edges: 248190
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7749
## Number of communities: 9
## Elapsed time: 1 seconds
DimPlot(seurat)
# Is cluster 7 composed of poor-quality cells?
markers_7 <- FindMarkers(
seurat, ident.1 = "7",
only.pos = TRUE,
logfc.threshold = 0.3
)
DT::datatable(markers_7)
As we can see, the fold-changes are very small and the cells are scattered all over the place. Thus, we will remove cluster 7 and recluster:
seurat <- subset(seurat, seurat_clusters != "7")
seurat <- FindNeighbors(seurat, reduction = "pca", dims = 1:20)
seurat <- FindClusters(seurat, resolution = 0.6)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 7284
## Number of edges: 245726
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7773
## Number of communities: 8
## Elapsed time: 1 seconds
DimPlot(seurat)
DimPlot(seurat, group.by = "time_point")
FeaturePlot(seurat, "CXCR4") +
scale_color_viridis_c(option = "magma")
Cluster 1 and 2 correspond to different time-points but to the same underlying subpopulation (CXCR4+ cells). Thus, we will consider it as a single cluster and rename the others.
Similarly, clusters 2 and 4 do not have distinctive markers (see that fold changes are very small):
markers_rt <- FindMarkers(
seurat,
ident.1 = "2",
ident.2 = "4",
only.pos = FALSE
)
DT::datatable(arrange(markers_rt, desc(avg_log2FC)))
Rename clusters:
seurat$final_clusters <- as.character(seurat$seurat_clusters)
seurat$final_clusters <- factor(case_when(
seurat$final_clusters == "0" ~ "0",
seurat$final_clusters == "1" ~ "1",
seurat$final_clusters == "2" ~ "1",
seurat$final_clusters == "3" ~ "2",
seurat$final_clusters == "4" ~ "2",
seurat$final_clusters == "5" ~ "3",
seurat$final_clusters == "6" ~ "4",
seurat$final_clusters == "7" ~ "5"
))
Idents(seurat) <- "final_clusters"
DimPlot(seurat, cols = color_palette)
saveRDS(seurat, path_to_save)
sessionInfo()
## R version 4.0.4 (2021-02-15)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=es_ES.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=es_ES.UTF-8 LC_MESSAGES=en_US.UTF-8 LC_PAPER=es_ES.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.6 purrr_0.3.4 readr_1.4.0 tidyr_1.1.3 tibble_3.1.2 ggplot2_3.3.3 tidyverse_1.3.1 harmony_1.0 Rcpp_1.0.6 SeuratWrappers_0.3.0 SeuratObject_4.0.2 Seurat_4.0.3 BiocStyle_2.18.1
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.2.1 plyr_1.8.6 igraph_1.2.6 lazyeval_0.2.2 splines_4.0.4 crosstalk_1.1.1 listenv_0.8.0 scattermore_0.7 digest_0.6.27 htmltools_0.5.1.1 fansi_0.5.0 magrittr_2.0.1 tensor_1.5 cluster_2.1.1 ROCR_1.0-11 limma_3.46.0 remotes_2.4.0 globals_0.14.0 modelr_0.1.8 matrixStats_0.59.0 spatstat.sparse_2.0-0 colorspace_2.0-1 rvest_1.0.0 ggrepel_0.9.1 haven_2.4.1 xfun_0.23 crayon_1.4.1 jsonlite_1.7.2 spatstat.data_2.1-0 survival_3.2-10 zoo_1.8-9 glue_1.4.2 polyclip_1.10-0 gtable_0.3.0 leiden_0.3.8 future.apply_1.7.0 abind_1.4-5 scales_1.1.1 DBI_1.1.1 miniUI_0.1.1.1 viridisLite_0.4.0 xtable_1.8-4 reticulate_1.20 spatstat.core_2.1-2 rsvd_1.0.5 DT_0.18 htmlwidgets_1.5.3 httr_1.4.2 RColorBrewer_1.1-2 ellipsis_0.3.2 ica_1.0-2 farver_2.1.0 pkgconfig_2.0.3
## [55] sass_0.4.0 uwot_0.1.10 dbplyr_2.1.1 deldir_0.2-10 here_1.0.1 utf8_1.2.1 labeling_0.4.2 tidyselect_1.1.1 rlang_0.4.11 reshape2_1.4.4 later_1.2.0 munsell_0.5.0 cellranger_1.1.0 tools_4.0.4 cli_2.5.0 generics_0.1.0 broom_0.7.7 ggridges_0.5.3 evaluate_0.14 fastmap_1.1.0 yaml_2.2.1 goftest_1.2-2 knitr_1.33 fs_1.5.0 fitdistrplus_1.1-5 RANN_2.6.1 pbapply_1.4-3 future_1.21.0 nlme_3.1-152 mime_0.10 xml2_1.3.2 compiler_4.0.4 rstudioapi_0.13 plotly_4.9.4 png_0.1-7 spatstat.utils_2.2-0 reprex_2.0.0 bslib_0.2.5.1 stringi_1.6.2 highr_0.9 RSpectra_0.16-0 lattice_0.20-41 Matrix_1.3-4 vctrs_0.3.8 pillar_1.6.1 lifecycle_1.0.0 BiocManager_1.30.15 spatstat.geom_2.1-0 lmtest_0.9-38 jquerylib_0.1.4 RcppAnnoy_0.0.18 data.table_1.14.0 cowplot_1.1.1 irlba_2.3.3
## [109] httpuv_1.6.1 patchwork_1.1.1 R6_2.5.0 bookdown_0.22 promises_1.2.0.1 KernSmooth_2.23-18 gridExtra_2.3 parallelly_1.26.0 codetools_0.2-18 MASS_7.3-53.1 assertthat_0.2.1 rprojroot_2.0.2 withr_2.4.2 sctransform_0.3.2 mgcv_1.8-36 parallel_4.0.4 hms_1.1.0 grid_4.0.4 rpart_4.1-15 rmarkdown_2.8 Rtsne_0.15 shiny_1.6.0 lubridate_1.7.10